# setwd("~/Dropbox/T&G project survey/Ukraine/replication_JOP")
# libraries
library(tidyverse)
library(cobalt)
library(patchwork)
library(paletteer)
source("func.R")

# ggplot defaults: geoms theme
update_geom_defaults("text", list(family = "Archivo Narrow"))
update_geom_defaults("label", list(family = "Archivo Narrow"))
theme_set(theme_nice())

## Load data
df = read_rds("data/clean_data.rds")

## dictionary
dict = tribble(~dirty, ~clean,
  "sexo_2", "Sex (female)",
  "edad", "Age",
  "edu", "Education",
  "clase_social_r", "Social class\n(scale)",
  "hab_r_2", "Location\n(over/below 10,000 residents)",
  "educacion_r", "Education\n(scale)",
  "Q12_2", "Voted in last elections",
  "Q14", "Income\n(scale)",
  "ideo", "Ideology\n(scale)")

## Descriptives

# Interviews per day

dfdates = df %>%
  select(post, date) %>%
  mutate(date = as.Date(date)) %>%
  filter(!is.na(date))

dates = ggplot(dfdates, aes(x = date)) +
  geom_histogram(binwidth = 1, position = "dodge2", color = "white") +
  theme(legend.position = "top") +
  scale_x_date(date_breaks = "3 days", date_labels = "%b %d") +
  # geom_vline(xintercept = as.Date("2022-02-24"), linetype = "dashed", color = "red") +
  geom_segment(aes(x = as.Date("2022-02-24"), y = 0, xend = as.Date("2022-02-24"), yend = 1800),
    linetype = "dashed", color = "red") +
  annotate(geom = "text", x = as.Date("2022-02-25"), color = "red", hjust = 0,
    y = 1700, label = "February 24: Russian\ninvasion starts") +
  labs(x = "", y = "Interviews per day")
ggsave("figures/hist_dates.pdf", height = 4, width = 7, device = cairo_pdf)

## Inspecting Balance

# make balance plots
pdat_diff = love.plot(post ~ sexo + edad + clase_social_r + ideo +
            hab_r + educacion_r + Q12 + Q14, data = df,
          thresholds = c(m = .1, v = 2))$data %>%
  tibble() %>%
  left_join(dict, by = c("var" = "dirty")) %>%
  mutate(clean = factor(clean)) %>%
  mutate(balanced = ifelse(abs(stat) < .1, "Balanced", "Unbalanced")) %>%
  mutate(statistic = "Standardized mean differences")

p1 = pdat_diff %>%
  ggplot(aes(y = clean, x = stat, color = balanced)) +
  geom_point(size = 2.2) +
  geom_vline(xintercept = 0) +
  geom_vline(xintercept = -.1, lty = 2) +
  geom_vline(xintercept = .1, lty = 2) +
  coord_cartesian(xlim = c(-1, 1)) +
  labs(x = "Standardized mean differences", y = NULL,
       color = NULL) +
  theme(legend.position = "top") +
  scale_color_paletteer_d(`"wesanderson::Darjeeling1"`, direction = -1)

ggsave(p1, filename = "figures/pre_matching_balance.pdf", device = cairo_pdf)


# plot individual variables that are "off"
off_vars = df %>%
  select(post, edad, ideo, Q14) %>%
  pivot_longer(-post) %>%
  left_join(dict, by = c("name" = "dirty")) %>%
  mutate(post = ifelse(post == 0, "Pre-invasion", "Post-invasion"),
         post = fct_rev(post)) %>%
  ggplot(aes(x = value, fill = post)) +
  geom_density(alpha = .8, color = "white") +
  facet_wrap(vars(clean), scales = "free") +
  theme(legend.position = "top") +
  scale_fill_paletteer_d(`"wesanderson::Darjeeling1"`, direction = -1) +
  labs(x = NULL, y = NULL, fill = "Sample:")

ggsave("figures/sample_imbalance.pdf", height = 5, width = 7, device = cairo_pdf)
